# Load in rooted phylogenetic tree!
load("data/03_Phylogenetic_Tree/phytree_preprocessed_physeq.RData")
unrooted_physeq## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 1736 taxa and 12 samples ]
## sample_data() Sample Data: [ 12 samples by 23 sample variables ]
## tax_table() Taxonomy Table: [ 1736 taxa by 9 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 1736 tips and 1734 internal nodes ]
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 1736 taxa and 12 samples ]
## sample_data() Sample Data: [ 12 samples by 23 sample variables ]
## tax_table() Taxonomy Table: [ 1736 taxa by 9 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 1736 tips and 1735 internal nodes ]
# Sequence depth will inform us on how we want to normalize our data
# Calculate the total number of reads per sample
raw_total_seqs_df <-
unrooted_physeq %>%
# Calculate the sample read sums
sample_sums() %>%
data.frame()
# Name the column
colnames(raw_total_seqs_df)[1] <- "TotalSeqs"
head(raw_total_seqs_df)## TotalSeqs
## 568_4 24503
## 568_5 17303
## 581_5 15963
## 611_5 13893
## E03_5 7131
## E05_5 45378
### scale_reads function and matround function
####################################################################################
# Function to scale reads: http://deneflab.github.io/MicrobeMiseq/
# Scales reads by
# 1) taking proportions
# 2) multiplying by a given library size of n
# 3) rounding
# Default for n is the minimum sample size in your library
# Default for round is floor
matround <- function(x){trunc(x+0.5)}
scale_reads <- function(physeq, n = min(sample_sums(physeq)), round = "round") {
# transform counts to n
physeq.scale <- transform_sample_counts(physeq, function(x) {(n * x/sum(x))})
# Pick the rounding functions
if (round == "floor"){
otu_table(physeq.scale) <- floor(otu_table(physeq.scale))
} else if (round == "round"){
otu_table(physeq.scale) <- round(otu_table(physeq.scale))
} else if (round == "matround"){
otu_table(physeq.scale) <- matround(otu_table(physeq.scale))
}
# Prune taxa and return new phyloseq object
physeq.scale <- prune_taxa(taxa_sums(physeq.scale) > 0, physeq.scale)
return(physeq.scale)
}Rescale all reads so they all represent the count of the lowest number of sequence reads. We will expect each sample to have # of reads around 2200
This is where one might decide to use rarefaction to normalize the data.
## [1] 7131
# Scale reads by the above function
scaled_rooted_physeq <-
midroot_physeq %>%
scale_reads(round = "matround")
# Calculate read depth
## Look at total number of sequences in each sample and compare to what we had before
scaled_total_seqs_df <-
scaled_rooted_physeq %>%
sample_sums() %>%
data.frame()
head(scaled_total_seqs_df)## .
## 568_4 7134
## 568_5 7126
## 581_5 7129
## 611_5 7148
## E03_5 7131
## E05_5 7143
# Change first column name to be "TotalSeqs"
colnames(scaled_total_seqs_df)[1] <- "TotalSeqs"
# Inspect
head(scaled_total_seqs_df)## TotalSeqs
## 568_4 7134
## 568_5 7126
## 581_5 7129
## 611_5 7148
## E03_5 7131
## E05_5 7143
# Check range of data
min_seqs <-
min(scaled_total_seqs_df)
max_seqs <-
max(scaled_total_seqs_df)
# Range of seqs
max_seqs - min_seqs## [1] 22
# Plot histogram
scaled_total_seqs_df %>%
ggplot(aes(x = TotalSeqs)) +
geom_histogram(bins = 50) +
scale_x_continuous(limits = c(0, 10000)) +
labs(title = "Scaled Sequencing Depth at 7131") +
theme_bw()## Warning: Removed 2 rows containing missing values (`geom_bar()`).
## TotalSeqs
## 568_4 7134
## 568_5 7126
## 581_5 7129
## 611_5 7148
## E03_5 7131
## E05_5 7143
Exploratory analyses from the Paily & Shankar (2016) paper, which is using unconstrained ordination methods like PCoA.
# Calculate sorenson dissimularity: Abundance-unweighted of shared taxa
scaled_soren_pcoa <-
ordinate(
physeq = scaled_rooted_physeq,
method = "PCoA",
distance = "bray", binary = TRUE)
#str(scaled_soren_pcoa)
# Plot the ordination
soren_gut_section_pcoa <- plot_ordination(
physeq = scaled_rooted_physeq,
ordination = scaled_soren_pcoa,
color = "gut_section",
shape = "gut_section",
title = "Sorensen PCoA") +
geom_point(size=5, alpha = 0.5, aes(color = gut_section)) +
scale_shape_manual(values = c(15,16)) +
scale_color_manual(values = gut_section_colors) +
theme_bw()
# Show the plot
soren_gut_section_pcoaHere, we are evaluating the shared taxa by presence/absence (abundance-unweighted) in the Sorensen metric.
Note that:
This means we explain 45% of the variation in the data in these two axes.
# Calculate the BC distance
scaled_BC_pcoa <-
ordinate(
physeq = scaled_rooted_physeq,
method = "PCoA",
distance = "bray")
# Plot the PCoA
bray_gut_section_pcoa <-
plot_ordination(
physeq = scaled_rooted_physeq,
ordination = scaled_BC_pcoa,
color = "gut_section",
shape = "gut_section",
title = "Bray-Curtis PCoA") +
geom_point(size=5, alpha = 0.5, aes(color = gut_section)) +
scale_shape_manual(values = c(15,16)) +
scale_color_manual(values = c(gut_section_colors)) +
theme_bw()
bray_gut_section_pcoaHere, we are evaluating the shared taxa and then weighting them by their abundances, which provides more influence for species that are more abundant.
Note that:
This means we explain 44% of the variation in the data in these two axes, which is almost exactly the same as the previous plot with the Sorensen Dissimilarity. Abundance does NOT seem to have an influence!!
# Calculate the BC distance
scaled_wUNI_pcoa <-
ordinate(
physeq = scaled_rooted_physeq,
method = "PCoA",
distance = "wunifrac")
# Plot the PCoA
wUNI_gut_section_pcoa <-
plot_ordination(
physeq = scaled_rooted_physeq,
ordination = scaled_wUNI_pcoa,
color = "gut_section",
shape = "gut_section",
title = "Weighted Unifrac PCoA") +
geom_point(size=5, alpha = 0.5, aes(color = gut_section)) +
scale_shape_manual(values = c(15,16,17,18, 19, 20)) +
scale_color_manual(values = c(gut_section_colors)) +
theme_bw()
wUNI_gut_section_pcoaHere, we are evaluating the shared taxa and then weighting them by their abundances, which provides more influence for species that are more abundant.
Note that:
This means we explain 82% of the variation in the data in these two axes (!!!), which is significantly more than the previous plots with the taxonomic dissimilarity measures. (Almost double.) Here, phylogeny seems to be very important! This means that taxa that are abundant are found in different places in the phylogenetic tree. Therefore, the evoultionary distances (aka the branch lengths) and their abundances seem to have a major influence!!
Let’s plot all three together into one plot to have a concise visualization of the three metrics.
(soren_gut_section_pcoa + theme(legend.position = "none")) +
(bray_gut_section_pcoa + theme(legend.position = "none")) +
(wUNI_gut_section_pcoa + theme(legend.position = "none"))Since we did 3 of the dissimilarity metrics for the PCoA, let’s just plot one example of them for the NMDS plotting. Here, we will use weighted Unifrac
# Calculate the Weighted Unifrac distance
scaled_wUNI_nmds <-
ordinate(
physeq = scaled_rooted_physeq,
method = "NMDS",
distance = "wunifrac")## Run 0 stress 0.02259718
## Run 1 stress 0.04058445
## Run 2 stress 0.0225971
## ... New best solution
## ... Procrustes: rmse 0.0004627231 max resid 0.0009222004
## ... Similar to previous best
## Run 3 stress 0.02246093
## ... New best solution
## ... Procrustes: rmse 0.03228751 max resid 0.06202915
## Run 4 stress 0.1975755
## Run 5 stress 0.02120173
## ... New best solution
## ... Procrustes: rmse 0.05206484 max resid 0.1470468
## Run 6 stress 0.02259701
## Run 7 stress 0.04365801
## Run 8 stress 0.02246088
## Run 9 stress 0.02246096
## Run 10 stress 0.04365804
## Run 11 stress 0.02259724
## Run 12 stress 0.02120183
## ... Procrustes: rmse 0.0002361478 max resid 0.0005617042
## ... Similar to previous best
## Run 13 stress 0.02259718
## Run 14 stress 0.02259705
## Run 15 stress 0.04365803
## Run 16 stress 0.02120177
## ... Procrustes: rmse 0.0001857661 max resid 0.0004386585
## ... Similar to previous best
## Run 17 stress 0.0406359
## Run 18 stress 0.3359234
## Run 19 stress 0.02246106
## Run 20 stress 0.02120183
## ... Procrustes: rmse 0.0001260278 max resid 0.0003101023
## ... Similar to previous best
## *** Best solution repeated 3 times
# Plot the PCoA
wUNI_gut_section_nmds <-
plot_ordination(
physeq = scaled_rooted_physeq,
ordination = scaled_wUNI_nmds,
color = "gut_section",
shape = "gut_section",
title = "Weighted Unifrac NMDS") +
geom_point(size=5, alpha = 0.5, aes(color = gut_section)) +
scale_shape_manual(values = c(15,16)) +
scale_color_manual(values = c(gut_section_colors)) +
theme_bw()
wUNI_gut_section_nmdsWe can see from above the plot that the stress value is ~0.02, which is very acceptable. And, It seems important to emphasize that the PCoA and the NMDS plot both look pretty similar!
In this case, I would always prefer to report the PCoA results because they are linear and provide a lot more post-hoc analyses to follow up with. In addition, it’s helpful to only have 2 axes of variation and show how much variation is explained.
(wUNI_gut_section_pcoa + theme(legend.position = "none")) +
(wUNI_gut_section_nmds + theme(legend.position = "none"))# Calculate all three of the distance matrices
scaled_sorensen_dist <- phyloseq::distance(scaled_rooted_physeq, method = "bray", binary = TRUE)
scaled_bray_dist <- phyloseq::distance(scaled_rooted_physeq, method = "bray")
scaled_wUnifrac_dist <- phyloseq::distance(scaled_rooted_physeq, method = "wunifrac")
# make a data frame from the sample_data
# All distance matrices will be the same metadata because they
# originate from the same phyloseq object.
metadata <- data.frame(sample_data(scaled_rooted_physeq))
# Adonis test
# In this example we are testing the hypothesis that the five stations
# that were collected have different centroids in the ordination space
# for each of the dissimilarity metrics, we are using a discrete variable
adonis2(scaled_sorensen_dist ~ gut_section, data = metadata)## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = scaled_sorensen_dist ~ gut_section, data = metadata)
## Df SumOfSqs R2 F Pr(>F)
## gut_section 1 0.6992 0.22161 2.847 0.003 **
## Residual 10 2.4559 0.77839
## Total 11 3.1551 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = scaled_bray_dist ~ gut_section, data = metadata)
## Df SumOfSqs R2 F Pr(>F)
## gut_section 1 0.77401 0.25492 3.4213 0.006 **
## Residual 10 2.26233 0.74508
## Total 11 3.03633 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = scaled_wUnifrac_dist ~ gut_section, data = metadata)
## Df SumOfSqs R2 F Pr(>F)
## gut_section 1 0.30891 0.55059 12.251 0.002 **
## Residual 10 0.25214 0.44941
## Total 11 0.56106 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Note that:
Above, we see that the most variation is explained by the weighted unifrac, which explains ~55% of the variation in the data and also has the highest F-statistic.
# We might also care about other variables
# Here, we will add date and fraction as variables
# multiplicative model ORDER MATTERS!
adonis2(scaled_sorensen_dist ~ gut_section * year, data = metadata)## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = scaled_sorensen_dist ~ gut_section * year, data = metadata)
## Df SumOfSqs R2 F Pr(>F)
## gut_section 1 0.69920 0.22161 2.8814 0.008 **
## year 1 0.22289 0.07064 0.9185 0.512
## gut_section:year 1 0.29179 0.09248 1.2025 0.203
## Residual 8 1.94124 0.61527
## Total 11 3.15512 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = scaled_bray_dist ~ gut_section * year, data = metadata)
## Df SumOfSqs R2 F Pr(>F)
## gut_section 1 0.77401 0.25492 3.4167 0.003 **
## year 1 0.21379 0.07041 0.9437 0.488
## gut_section:year 1 0.23623 0.07780 1.0428 0.341
## Residual 8 1.81231 0.59687
## Total 11 3.03633 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = scaled_wUnifrac_dist ~ gut_section * year, data = metadata)
## Df SumOfSqs R2 F Pr(>F)
## gut_section 1 0.30891 0.55059 11.3379 0.003 **
## year 1 0.00562 0.01002 0.2063 0.955
## gut_section:year 1 0.02856 0.05090 1.0481 0.346
## Residual 8 0.21797 0.38850
## Total 11 0.56106 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = scaled_wUnifrac_dist ~ year * gut_section, data = metadata)
## Df SumOfSqs R2 F Pr(>F)
## year 1 0.02068 0.03686 0.7590 0.508
## gut_section 1 0.29385 0.52375 10.7852 0.003 **
## year:gut_section 1 0.02856 0.05090 1.0481 0.350
## Residual 8 0.21797 0.38850
## Total 11 0.56106 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
We can also run tests that include additive (+) or multipliciatve models, which include the interaction term between variables.
The PERMANOVA is sensitive to variance/dispersion in the data. Therefore, we need to run a homogeneity of dispersion test to test for the sensitivity of our PERMANOVA results to variance.
# Homogeneity of Disperson test with beta dispr
# Sorensen
beta_soren_gut_section <- betadisper(scaled_sorensen_dist, metadata$gut_section)
permutest(beta_soren_gut_section)##
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
##
## Response: Distances
## Df Sum Sq Mean Sq F N.Perm Pr(>F)
## Groups 1 0.005544 0.0055438 1.4467 999 0.262
## Residuals 10 0.038320 0.0038320
# Bray-curtis
beta_bray_gut_section <- betadisper(scaled_bray_dist, metadata$gut_section)
permutest(beta_bray_gut_section)##
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
##
## Response: Distances
## Df Sum Sq Mean Sq F N.Perm Pr(>F)
## Groups 1 0.000027 0.0000270 0.0051 999 0.937
## Residuals 10 0.053303 0.0053303
# Weighted Unifrac
beta_bray_gut_section <- betadisper(scaled_wUnifrac_dist, metadata$gut_section)
permutest(beta_bray_gut_section)##
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
##
## Response: Distances
## Df Sum Sq Mean Sq F N.Perm Pr(>F)
## Groups 1 0.009239 0.0092386 1.5508 999 0.26
## Residuals 10 0.059573 0.0059573
Above, our variance is not impacted by gut section.
# Set the phylum colors
phylum_colors <- c(
Acidobacteriota = "navy",
Actinobacteriota = "darkslategray2",
Armatimonadota = "deeppink1",
Alphaproteobacteria = "plum2",
Bacteroidota = "gold",
Betaproteobacteria = "plum1",
Bdellovibrionota = "red1",
Chloroflexi="black",
Crenarchaeota = "firebrick",
Cyanobacteria = "limegreen",
Deltaproteobacteria = "grey",
Desulfobacterota="magenta",
Fibrobacterota = "darkgreen",
Firmicutes = "#3E9B96",
Gammaproteobacteria = "greenyellow",
"Marinimicrobia (SAR406 clade)" = "yellow",
Myxococcota = "#B5D6AA",
Nitrospirota = "palevioletred1",
Proteobacteria = "royalblue",
Planctomycetota = "darkorange",
Spirochaetota = "olivedrab",
Thermoplasmatota = "green",
Verrucomicrobiota = "darkorchid1")# Goal is to calculate the phylum relative abundance
# Note: the read depth must be normalized in some way: scaled_reads
phylum_df <-
scaled_rooted_physeq %>%
# agglomerate at the phylum level
tax_glom(taxrank = "Phylum") %>%
# Transform counts to relative abundance
transform_sample_counts(function (x) {x/sum(x)}) %>%
# Melt to a long format
psmelt() %>%
# Filter out phyla that are less than one percent - get rid of low abundant Phyla
dplyr::filter(Abundance > 0.01)
# fix the order of date
# Stacked bar plot with all Phyla
# Plot Phylum Abundances - make sure to load phylum_colors
phylum_df %>%
# Warning: Its important to have one sample per x value,
# Otherwise, it will take the sum between multiple samples
ggplot(aes(x = names, y = Abundance, fill = Phylum)) +
facet_grid(.~gut_section) +
geom_bar(stat = "identity", color = "black") +
labs(title = "Gut Section Phylum Composition") +
scale_fill_manual(values = phylum_colors) +
theme_bw() +
theme(axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1))# Plot Phylum Abundances - make sure to load phylum_colors
phylum_df %>%
# Warning: Its important to have one sample per x value,
# Otherwise, it will take the sum between multiple samples
ggplot(aes(x = names, y = Abundance, fill = Phylum)) +
geom_bar(stat = "identity", color = "black") +
labs(title = "Gut Section Phylum Composition") +
scale_fill_manual(values = phylum_colors) +
theme_bw() +
theme(axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1))# Make each Phyla its own row
#fixed scale
phylum_df %>%
ggplot(aes(x = names, y = Abundance, fill = Phylum)) +
facet_grid(Phylum~gut_section) +
geom_bar(stat = "identity", color = "black") +
labs(title = "Gut Section Phylum Composition") +
scale_fill_manual(values = phylum_colors) +
theme_bw() +
theme(axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1))# free scale
phylum_df %>%
ggplot(aes(x = names, y = Abundance, fill = Phylum)) +
facet_grid(Phylum~gut_section, scale = "free") +
geom_bar(stat = "identity", color = "black") +
labs(title = "Gut Section Phylum Composition") +
scale_fill_manual(values = phylum_colors) +
theme_bw() +
theme(axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1))# Narrow in on a specific group
# Bacteroidota - y: abundance, x: gut section, dot plot + boxplot
Bacteroidota_phylum <-
phylum_df %>%
dplyr::filter(Phylum == "Bacteroidota") %>%
# build the plot
ggplot(aes(x = gut_section, y = Abundance, fill = gut_section, color = gut_section)) +
geom_boxplot(alpha = 0.5, outlier.shape = NA) +
# outliers not plotted here in boxplot
geom_jitter() +
theme_bw() +
labs(title = "Bacteroidota Phylum Abundance") +
scale_color_manual(values = gut_section_colors) +
scale_fill_manual(values = gut_section_colors) +
stat_compare_means(method = "kruskal.test")
Bacteroidota_phylum# Cyanobacteria - y: abundance, x: gut section, dot plot + boxplot
Cyanobacteria_phylum <-
phylum_df %>%
dplyr::filter(Phylum == "Cyanobacteria") %>%
# build the plot
ggplot(aes(x = gut_section, y = Abundance, fill = gut_section, color = gut_section)) +
geom_boxplot(alpha = 0.5, outlier.shape = NA) +
# outliers not plotted here in boxplot
geom_jitter() +
theme_bw() +
labs(title = "Cyanobacteria Phylum Abundance") +
scale_color_manual(values = gut_section_colors) +
scale_fill_manual(values = gut_section_colors) +
stat_compare_means(method = "kruskal.test")
Cyanobacteria_phylum## Warning: Computation failed in `stat_compare_means()`
## Caused by error in `kruskal.test.default()`:
## ! all observations are in the same group
# Desulfobacterota - y: abundance, x: gut section, dot plot + boxplot
Desulfobacterota_phylum <-
phylum_df %>%
dplyr::filter(Phylum == "Desulfobacterota") %>%
# build the plot
ggplot(aes(x = gut_section, y = Abundance, fill = gut_section, color = gut_section)) +
geom_boxplot(alpha = 0.5, outlier.shape = NA) +
# outliers not plotted here in boxplot
geom_jitter() +
theme_bw() +
labs(title = "Desulfobacterota Phylum Abundance") +
scale_color_manual(values = gut_section_colors) +
scale_fill_manual(values = gut_section_colors) +
stat_compare_means(method = "kruskal.test")
Desulfobacterota_phylum# Fibrobacterota - y: abundance, x: gut section, dot plot + boxplot
Fibrobacterota_phylum <-
phylum_df %>%
dplyr::filter(Phylum == "Fibrobacterota") %>%
# build the plot
ggplot(aes(x = gut_section, y = Abundance, fill = gut_section, color = gut_section)) +
geom_boxplot(alpha = 0.5, outlier.shape = NA) +
# outliers not plotted here in boxplot
geom_jitter() +
theme_bw() +
labs(title = "Fibrobacterota Phylum Abundance") +
scale_color_manual(values = gut_section_colors) +
scale_fill_manual(values = gut_section_colors) +
stat_compare_means(method = "kruskal.test")
Fibrobacterota_phylum## Warning: Computation failed in `stat_compare_means()`
## Caused by error in `kruskal.test.default()`:
## ! all observations are in the same group
# Firmicutes - y: abundance, x: gut section, dot plot + boxplot
Firmicutes_phylum <-
phylum_df %>%
dplyr::filter(Phylum == "Firmicutes") %>%
# build the plot
ggplot(aes(x = gut_section, y = Abundance, fill = gut_section, color = gut_section)) +
geom_boxplot(alpha = 0.5, outlier.shape = NA) +
# outliers not plotted here in boxplot
geom_jitter() +
theme_bw() +
labs(title = "Firmicutes Phylum Abundance") +
scale_color_manual(values = gut_section_colors) +
scale_fill_manual(values = gut_section_colors) +
stat_compare_means(method = "kruskal.test")
Firmicutes_phylum# Proteobacteria - y: abundance, x: gut section, dot plot + boxplot
Proteobacteria_phylum <-
phylum_df %>%
dplyr::filter(Phylum == "Proteobacteria") %>%
# build the plot
ggplot(aes(x = gut_section, y = Abundance, fill = gut_section, color = gut_section)) +
geom_boxplot(alpha = 0.5, outlier.shape = NA) +
# outliers not plotted here in boxplot
geom_jitter() +
theme_bw() +
labs(title = "Proteobacteria Phylum Abundance") +
scale_color_manual(values = gut_section_colors) +
scale_fill_manual(values = gut_section_colors) +
stat_compare_means(method = "kruskal.test")
Proteobacteria_phylum# Spirochaetota- y: abundance, x: gut section, dot plot + boxplot
Spirochaetota_phylum <-
phylum_df %>%
dplyr::filter(Phylum == "Spirochaetota") %>%
# build the plot
ggplot(aes(x = gut_section, y = Abundance, fill = gut_section, color = gut_section)) +
geom_boxplot(alpha = 0.5, outlier.shape = NA) +
# outliers not plotted here in boxplot
geom_jitter() +
theme_bw() +
labs(title = "Spirochaetota Phylum Abundance") +
scale_color_manual(values = gut_section_colors) +
scale_fill_manual(values = gut_section_colors)
# Verrucomicrobiota - y: abundance, x: gut section, dot plot + boxplot
Verrucomicrobiota_phylum <-
phylum_df %>%
dplyr::filter(Phylum == "Verrucomicrobiota") %>%
# build the plot
ggplot(aes(x = gut_section, y = Abundance, fill = gut_section, color = gut_section)) +
geom_boxplot(alpha = 0.5, outlier.shape = NA) +
# outliers not plotted here in boxplot
geom_jitter() +
theme_bw() +
labs(title = "Verrucomicrobiota Phylum Abundance") +
scale_color_manual(values = gut_section_colors) +
scale_fill_manual(values = gut_section_colors) +
stat_compare_means(method = "kruskal.test")
Verrucomicrobiota_phylumggarrange(Bacteroidota_phylum, Cyanobacteria_phylum, Desulfobacterota_phylum,
Fibrobacterota_phylum, Firmicutes_phylum, Proteobacteria_phylum,
Spirochaetota_phylum, Verrucomicrobiota_phylum)## Warning: Computation failed in `stat_compare_means()`
## Computation failed in `stat_compare_means()`
## Caused by error in `kruskal.test.default()`:
## ! all observations are in the same group
# Set the phylum colors
family_colors <- c(
Akkermansiaceae = "navy",
Christensenellaceae = "darkslategray2",
Clostridiaceae = "deeppink1",
Cyclobacteriaceae = "plum2",
Desulfovibrionaceae = "gold",
Endozoixomonadaceae = "plum1",
Fibrobacteraceae = "red1",
Lachnospiraceae="black",
Marinifilaceae = "firebrick",
Marinilabiliaceae = "limegreen",
Oscillospiraceae = "grey",
Paludibacteraceae ="magenta",
Peptostreptococcaceae = "darkgreen",
Puniceicoccacaeae = "#3E9B96",
Rikenellaceae = "greenyellow",
"RS-E47 termite group" = "yellow",
Ruminococcaceae = "#B5D6AA",
Shewanellaceae = "palevioletred1",
Spirochaetaceae = "royalblue",
vandinBE97 = "darkorange",
vanfinHA21 = "olivedrab",
Vibrionaceae = "greenyellow",
Victivallaceae = "darkorchid1")family_df <-
scaled_rooted_physeq %>%
# agglomerate at the phylum level
tax_glom(taxrank = "Family") %>%
# Transform counts to relative abundance
transform_sample_counts(function (x) {x/sum(x)}) %>%
# Melt to a long format
psmelt() %>%
# Filter out phyla that are less than one percent - get rid of low abundant Phyla
dplyr::filter(Abundance > 0.01)
# Plot family Abundances
family_df %>%
# Warning: Its important to have one sample per x value,
# Otherwise, it will take the sum between multiple samples
ggplot(aes(x = names, y = Abundance, fill = Family)) +
geom_bar(stat = "identity", color = "black") +
labs(title = "Gut Section Family Composition") +
scale_fill_manual(values = family_colors) +
theme_bw() +
theme(axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1)) # Bacteroidota
Bacteroidota_family <-
family_df %>%
dplyr::filter(Phylum == "Bacteroidota") %>%
# build the plot
ggplot(aes(x = gut_section, y = Abundance,
fill = gut_section, color = gut_section)) +
facet_wrap(.~Family, scales = "free_y", nrow = 1) +
geom_boxplot(alpha = 0.5, outlier.shape = NA) +
# outliers not plotted here in boxplot
geom_jitter() +
theme_bw() +
labs(title = "Bacteroidota Family Abundance") +
scale_color_manual(values = gut_section_colors) +
scale_fill_manual(values = gut_section_colors)
Bacteroidota_family# Desulfobacterota
Desulfobacterota_family <-
family_df %>%
dplyr::filter(Phylum == "Desulfobacterota") %>%
# build the plot
ggplot(aes(x = gut_section, y = Abundance,
fill = gut_section, color = gut_section)) +
facet_wrap(.~Family, scales = "free_y", nrow = 1) +
geom_boxplot(alpha = 0.5, outlier.shape = NA) +
# outliers not plotted here in boxplot
geom_jitter() +
theme_bw() +
labs(title = "Desulfobacterota Family Abundance") +
scale_color_manual(values = gut_section_colors) +
scale_fill_manual(values = gut_section_colors) +
stat_compare_means(method = "kruskal.test")
Desulfobacterota_family# Firmicutes
Firmicutes_family <-
family_df %>%
dplyr::filter(Phylum == "Firmicutes") %>%
# build the plot
ggplot(aes(x = gut_section, y = Abundance,
fill = gut_section, color = gut_section)) +
facet_wrap(.~Family, scales = "free_y", nrow = 1) +
geom_boxplot(alpha = 0.5, outlier.shape = NA) +
# outliers not plotted here in boxplot
geom_jitter() +
theme_bw() +
labs(title = "Firmicutes Family Abundance") +
scale_color_manual(values = gut_section_colors) +
scale_fill_manual(values = gut_section_colors)
Firmicutes_family# Proteobacteria
Proteobacteria_family <-
family_df %>%
dplyr::filter(Phylum == "Proteobacteria") %>%
# build the plot
ggplot(aes(x = gut_section, y = Abundance,
fill = gut_section, color = gut_section)) +
facet_wrap(.~Family, scales = "free_y", nrow = 1) +
geom_boxplot(alpha = 0.5, outlier.shape = NA) +
# outliers not plotted here in boxplot
geom_jitter() +
theme_bw() +
labs(title = "Proteobacteria Family Abundance") +
scale_color_manual(values = gut_section_colors) +
scale_fill_manual(values = gut_section_colors)
# Verrucomicrobiota
Verrucomicrobiota_family <-
family_df %>%
dplyr::filter(Phylum == "Verrucomicrobiota") %>%
# build the plot
ggplot(aes(x = gut_section, y = Abundance,
fill = gut_section, color = gut_section)) +
facet_wrap(.~Family, scales = "free_y", nrow = 1) +
geom_boxplot(alpha = 0.5, outlier.shape = NA) +
# outliers not plotted here in boxplot
geom_jitter() +
theme_bw() +
labs(title = "Verrucomicrobiota Family Abundance") +
scale_color_manual(values = gut_section_colors) +
scale_fill_manual(values = gut_section_colors)# Set the phylum colors
Genus_colors <- c(
Akkermansia = "navy",
"Christensenellaceae R-7 group" = "darkslategray2",
Alistipes = "deeppink1",
Aureibacter = "plum2",
Endozoicomonas = "gold",
Desulfobotulus= "magenta4",
Fibrobacter = "red1",
Desulfovibrio="black",
"dgA-11 gut group" = "firebrick",
Epulopiscium = "limegreen",
Ferrimonas = "grey",
Odoribacter ="magenta",
Rikenella = "greenyellow",
"Rikenellaceae RC9 gut group" = "yellow",
Ruminococcus = "#B5D6AA",
Romboustia = "palevioletred1",
Treponema = "royalblue",
Vibrio = "greenyellow",
Other = "darkgray")Genus_df <-
scaled_rooted_physeq %>%
# agglomerate at the phylum level
tax_glom(taxrank = "Genus") %>%
# Transform counts to relative abundance
transform_sample_counts(function (x) {x/sum(x)}) %>%
# Melt to a long format
psmelt() %>%
# Filter out phyla that are less than one percent - get rid of low abundant Phyla
dplyr::filter(Abundance > 0.01)
# Facet by gut section
Genus_df %>%
# Warning: Its important to have one sample per x value,
# Otherwise, it will take the sum between multiple samples
ggplot(aes(x = names, y = Abundance, fill = Genus)) +
facet_grid(.~gut_section) +
geom_bar(stat = "identity", color = "black") +
scale_fill_manual(values = Genus_colors) +
theme_bw() +
theme(axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1))Genus_df %>%
# Warning: Its important to have one sample per x value,
# Otherwise, it will take the sum between multiple samples
ggplot(aes(x = names, y = Abundance, fill = Genus)) +
#facet_grid(.~gut_section) +
geom_bar(stat = "identity", color = "black") +
scale_fill_manual(values = Genus_colors) +
theme_bw() +
theme(axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1))# Bacteroidota
Genus_df %>%
dplyr::filter(Phylum == "Bacteroidota") %>%
# build the plot
ggplot(aes(x = gut_section, y = Abundance,
fill = gut_section, color = gut_section)) +
facet_wrap(.~Genus, scales = "free_y", nrow = 1) +
geom_boxplot(alpha = 0.5, outlier.shape = NA) +
# outliers not plotted here in boxplot
geom_jitter() +
theme_bw() +
labs(title = "Bacteroidota genus Abundance") +
scale_color_manual(values = gut_section_colors) +
scale_fill_manual(values = gut_section_colors) # Firmicutes
Genus_df %>%
dplyr::filter(Phylum == "Firmicutes") %>%
# build the plot
ggplot(aes(x = gut_section, y = Abundance,
fill = gut_section, color = gut_section)) +
facet_wrap(.~Genus, scales = "free_y", nrow = 1) +
geom_boxplot(alpha = 0.5, outlier.shape = NA) +
# outliers not plotted here in boxplot
geom_jitter() +
theme_bw() +
labs(title = "Firmicutes Genus Abundance") +
scale_color_manual(values = gut_section_colors) +
scale_fill_manual(values = gut_section_colors)# Proteobacteria
Genus_df %>%
dplyr::filter(Phylum == "Proteobacteria") %>%
# build the plot
ggplot(aes(x = gut_section, y = Abundance,
fill = gut_section, color = gut_section)) +
facet_wrap(.~Genus, scales = "free_y", nrow = 1) +
geom_boxplot(alpha = 0.5, outlier.shape = NA) +
# outliers not plotted here in boxplot
geom_jitter() +
theme_bw() +
labs(title = "Proteobacteria Genus Abundance") +
scale_color_manual(values = gut_section_colors) +
scale_fill_manual(values = gut_section_colors)For Reproducibility
## ─ Session info ───────────────────────────────────────────────────────────────
## setting value
## version R version 4.3.2 (2023-10-31)
## os macOS Sonoma 14.4.1
## system x86_64, darwin20
## ui X11
## language (EN)
## collate en_US.UTF-8
## ctype en_US.UTF-8
## tz America/New_York
## date 2024-05-10
## pandoc 3.1.11 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/x86_64/ (via rmarkdown)
##
## ─ Packages ───────────────────────────────────────────────────────────────────
## package * version date (UTC) lib source
## abind 1.4-5 2016-07-21 [2] CRAN (R 4.3.0)
## ade4 1.7-22 2023-02-06 [2] CRAN (R 4.3.0)
## ape 5.7-1 2023-03-13 [2] CRAN (R 4.3.0)
## backports 1.4.1 2021-12-13 [2] CRAN (R 4.3.0)
## Biobase 2.62.0 2023-10-24 [2] Bioconductor
## BiocGenerics 0.48.1 2023-11-01 [2] Bioconductor
## biomformat 1.30.0 2023-10-24 [2] Bioconductor
## Biostrings 2.70.2 2024-01-28 [2] Bioconductor 3.18 (R 4.3.2)
## bitops 1.0-7 2021-04-24 [2] CRAN (R 4.3.0)
## broom 1.0.5 2023-06-09 [2] CRAN (R 4.3.0)
## bslib 0.6.1 2023-11-28 [2] CRAN (R 4.3.0)
## cachem 1.0.8 2023-05-01 [2] CRAN (R 4.3.0)
## car 3.1-2 2023-03-30 [1] CRAN (R 4.3.0)
## carData 3.0-5 2022-01-06 [1] CRAN (R 4.3.0)
## cli 3.6.2 2023-12-11 [2] CRAN (R 4.3.0)
## cluster 2.1.6 2023-12-01 [2] CRAN (R 4.3.0)
## codetools 0.2-19 2023-02-01 [2] CRAN (R 4.3.2)
## colorspace 2.1-0 2023-01-23 [2] CRAN (R 4.3.0)
## cowplot 1.1.3 2024-01-22 [1] CRAN (R 4.3.2)
## crayon 1.5.2 2022-09-29 [2] CRAN (R 4.3.0)
## data.table 1.15.0 2024-01-30 [2] CRAN (R 4.3.2)
## devtools * 2.4.5 2022-10-11 [2] CRAN (R 4.3.0)
## digest 0.6.34 2024-01-11 [2] CRAN (R 4.3.0)
## dplyr * 1.1.4 2023-11-17 [2] CRAN (R 4.3.0)
## ellipsis 0.3.2 2021-04-29 [2] CRAN (R 4.3.0)
## evaluate 0.23 2023-11-01 [2] CRAN (R 4.3.0)
## fansi 1.0.6 2023-12-08 [2] CRAN (R 4.3.0)
## farver 2.1.1 2022-07-06 [2] CRAN (R 4.3.0)
## fastmap 1.1.1 2023-02-24 [2] CRAN (R 4.3.0)
## forcats * 1.0.0 2023-01-29 [2] CRAN (R 4.3.0)
## foreach 1.5.2 2022-02-02 [2] CRAN (R 4.3.0)
## fs 1.6.3 2023-07-20 [2] CRAN (R 4.3.0)
## generics 0.1.3 2022-07-05 [2] CRAN (R 4.3.0)
## GenomeInfoDb 1.38.6 2024-02-08 [2] Bioconductor 3.18 (R 4.3.2)
## GenomeInfoDbData 1.2.11 2023-11-13 [2] Bioconductor
## ggplot2 * 3.4.4 2023-10-12 [1] CRAN (R 4.3.0)
## ggpubr * 0.6.0 2023-02-10 [1] CRAN (R 4.3.0)
## ggsignif 0.6.4 2022-10-13 [1] CRAN (R 4.3.0)
## glue 1.7.0 2024-01-09 [2] CRAN (R 4.3.0)
## gtable 0.3.4 2023-08-21 [2] CRAN (R 4.3.0)
## highr 0.10 2022-12-22 [2] CRAN (R 4.3.0)
## hms 1.1.3 2023-03-21 [2] CRAN (R 4.3.0)
## htmltools 0.5.7 2023-11-03 [2] CRAN (R 4.3.0)
## htmlwidgets 1.6.4 2023-12-06 [2] CRAN (R 4.3.0)
## httpuv 1.6.14 2024-01-26 [2] CRAN (R 4.3.2)
## igraph 2.0.1.1 2024-01-30 [2] CRAN (R 4.3.2)
## IRanges 2.36.0 2023-10-24 [2] Bioconductor
## iterators 1.0.14 2022-02-05 [2] CRAN (R 4.3.0)
## jquerylib 0.1.4 2021-04-26 [2] CRAN (R 4.3.0)
## jsonlite 1.8.8 2023-12-04 [2] CRAN (R 4.3.0)
## knitr 1.45 2023-10-30 [2] CRAN (R 4.3.0)
## labeling 0.4.3 2023-08-29 [2] CRAN (R 4.3.0)
## later 1.3.2 2023-12-06 [2] CRAN (R 4.3.0)
## lattice * 0.22-5 2023-10-24 [2] CRAN (R 4.3.0)
## lifecycle 1.0.4 2023-11-07 [2] CRAN (R 4.3.0)
## lubridate * 1.9.3 2023-09-27 [2] CRAN (R 4.3.0)
## magrittr 2.0.3 2022-03-30 [2] CRAN (R 4.3.0)
## MASS 7.3-60.0.1 2024-01-13 [2] CRAN (R 4.3.0)
## Matrix 1.6-5 2024-01-11 [2] CRAN (R 4.3.0)
## memoise 2.0.1 2021-11-26 [2] CRAN (R 4.3.0)
## mgcv 1.9-1 2023-12-21 [2] CRAN (R 4.3.0)
## mime 0.12 2021-09-28 [2] CRAN (R 4.3.0)
## miniUI 0.1.1.1 2018-05-18 [2] CRAN (R 4.3.0)
## multtest 2.58.0 2023-10-24 [2] Bioconductor
## munsell 0.5.0 2018-06-12 [2] CRAN (R 4.3.0)
## nlme 3.1-164 2023-11-27 [2] CRAN (R 4.3.0)
## pacman 0.5.1 2019-03-11 [1] CRAN (R 4.3.0)
## patchwork * 1.2.0.9000 2024-05-07 [1] Github (thomasp85/patchwork@d943757)
## permute * 0.9-7 2022-01-27 [2] CRAN (R 4.3.0)
## phyloseq * 1.46.0 2023-10-24 [2] Bioconductor
## pillar 1.9.0 2023-03-22 [2] CRAN (R 4.3.0)
## pkgbuild 1.4.3 2023-12-10 [2] CRAN (R 4.3.0)
## pkgconfig 2.0.3 2019-09-22 [2] CRAN (R 4.3.0)
## pkgload 1.3.4 2024-01-16 [2] CRAN (R 4.3.0)
## plyr 1.8.9 2023-10-02 [1] CRAN (R 4.3.0)
## profvis 0.3.8 2023-05-02 [2] CRAN (R 4.3.0)
## promises 1.2.1 2023-08-10 [2] CRAN (R 4.3.0)
## purrr * 1.0.2 2023-08-10 [2] CRAN (R 4.3.0)
## R6 2.5.1 2021-08-19 [2] CRAN (R 4.3.0)
## Rcpp 1.0.12 2024-01-09 [2] CRAN (R 4.3.0)
## RCurl 1.98-1.14 2024-01-09 [2] CRAN (R 4.3.0)
## readr * 2.1.5 2024-01-10 [2] CRAN (R 4.3.0)
## remotes 2.4.2.1 2023-07-18 [2] CRAN (R 4.3.0)
## reshape2 1.4.4 2020-04-09 [2] CRAN (R 4.3.0)
## rhdf5 2.46.1 2023-11-29 [2] Bioconductor
## rhdf5filters 1.14.1 2023-11-06 [2] Bioconductor
## Rhdf5lib 1.24.2 2024-02-07 [2] Bioconductor 3.18 (R 4.3.2)
## rlang 1.1.3 2024-01-10 [2] CRAN (R 4.3.0)
## rmarkdown 2.25 2023-09-18 [2] CRAN (R 4.3.0)
## rstatix * 0.7.2 2023-02-01 [1] CRAN (R 4.3.0)
## rstudioapi 0.15.0 2023-07-07 [2] CRAN (R 4.3.0)
## S4Vectors 0.40.2 2023-11-23 [2] Bioconductor
## sass 0.4.8 2023-12-06 [2] CRAN (R 4.3.0)
## scales 1.3.0 2023-11-28 [2] CRAN (R 4.3.0)
## sessioninfo 1.2.2 2021-12-06 [2] CRAN (R 4.3.0)
## shiny 1.8.0 2023-11-17 [2] CRAN (R 4.3.0)
## stringi 1.8.3 2023-12-11 [2] CRAN (R 4.3.0)
## stringr * 1.5.1 2023-11-14 [2] CRAN (R 4.3.0)
## survival 3.5-7 2023-08-14 [2] CRAN (R 4.3.0)
## tibble * 3.2.1 2023-03-20 [2] CRAN (R 4.3.0)
## tidyr * 1.3.1 2024-01-24 [2] CRAN (R 4.3.2)
## tidyselect 1.2.0 2022-10-10 [2] CRAN (R 4.3.0)
## tidyverse * 2.0.0 2023-02-22 [1] CRAN (R 4.3.0)
## timechange 0.3.0 2024-01-18 [2] CRAN (R 4.3.0)
## tzdb 0.4.0 2023-05-12 [2] CRAN (R 4.3.0)
## urlchecker 1.0.1 2021-11-30 [2] CRAN (R 4.3.0)
## usethis * 2.2.2 2023-07-06 [2] CRAN (R 4.3.0)
## utf8 1.2.4 2023-10-22 [2] CRAN (R 4.3.0)
## vctrs 0.6.5 2023-12-01 [2] CRAN (R 4.3.0)
## vegan * 2.6-4 2022-10-11 [1] CRAN (R 4.3.0)
## withr 3.0.0 2024-01-16 [2] CRAN (R 4.3.0)
## xfun 0.42 2024-02-08 [2] CRAN (R 4.3.2)
## xtable 1.8-4 2019-04-21 [2] CRAN (R 4.3.0)
## XVector 0.42.0 2023-10-24 [2] Bioconductor
## yaml 2.3.8 2023-12-11 [2] CRAN (R 4.3.0)
## zlibbioc 1.48.0 2023-10-24 [2] Bioconductor
##
## [1] /Users/cab565/Library/R/x86_64/4.3/library
## [2] /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/library
##
## ──────────────────────────────────────────────────────────────────────────────